# !conda install -y # plotly
# !pip install chart_studio # seaborn spacy twitter emoji
# !python -m spacy download en_core_web_lg
import twitter, re, emoji, random, operator, os, math, re, string, copy, itertools, pickle, datetime, pandas as pd, numpy as np, matplotlib.pyplot as plt, networkx as nx
from urllib.request import urlopen
from zipfile import ZipFile
from io import BytesIO
from collections import Counter, OrderedDict
import operator
from wordcloud import WordCloud
import pygraphviz
from networkx.drawing.nx_agraph import graphviz_layout
from networkx.algorithms import community
import community as louvain
import spacy
nlp = spacy.load('en_core_web_lg')
# Plotting-related
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
# from matplotlib import pyplot as plt
# from matplotlib.gridspec import GridSpec
from matplotlib.ticker import FuncFormatter
import matplotlib.colors as mcolors
import matplotlib._color_data as mcd
%matplotlib inline
import seaborn as sns
import plotly
from plotly import tools
# import plotly.plotly as py
import chart_studio.plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
#connects JS to notebook so plots work inline
init_notebook_mode(connected=True)
# import bokeh
# from bokeh.io import push_notebook, show, output_notebook
# import bokeh.plotting as bp
# from bokeh.plotting import figure, save, output_file, show
# from bokeh.models import (ColumnDataSource, LabelSet, Label, BoxSelectTool, Circle, EdgesAndLinkedNodes, HoverTool,MultiLine, NodesAndLinkedEdges, Plot, Range1d, TapTool,)
# output_notebook()
# import holoviews as hv
# from holoviews import dim, opts
# hv.extension('bokeh', 'matplotlib')
# from holoviews.operation.datashader import datashade, bundle_graph
# import hvplot.pandas
# from holoviews.operation import gridmatrix
# # import IPython
# from IPython.display import display, HTML, Image
# # hv.notebook_extension()
import bokeh
from bokeh.io import push_notebook, show, output_notebook, save
import bokeh.plotting as bp
from bokeh.plotting import figure, save, output_file, show #, from_networkx
from bokeh.models import (ColumnDataSource, LabelSet, Label, BoxSelectTool, Circle, EdgesAndLinkedNodes, HoverTool,MultiLine, NodesAndLinkedEdges, Plot, Range1d, TapTool,)
from holoviews.element.graphs import layout_nodes
# bokeh.sampledata.download()
from bokeh.sampledata.airport_routes import routes, airports
output_notebook()
import holoviews as hv
from holoviews import dim, opts
hv.extension('bokeh', 'matplotlib')
from holoviews.operation import gridmatrix
from holoviews.operation.datashader import datashade, bundle_graph
from holoviews import Graph, Nodes
from holoviews.plotting.bokeh import GraphPlot, LabelsPlot
import hvplot.networkx as hvnx
import hvplot.pandas
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.simplefilter('ignore')
# # Twitter dataset URL
dataURL='https://www.kaggle.com/omermetinn/tweets-about-the-top-companies-from-2015-to-2020?select=Tweet.csv'
dataName="companies"
# pull the last 20k lines, which are the most recent
!{ head -1 Tweet.csv ; tail -32000 Tweet.csv ;} >tweet_recent_32k.csv
# # Using the Twitter dataset COVID.csv retrieved from https://www.trackmyhashtag.com/blog/free-twitter-datasets/
# z = urlopen(dataURL) #'http://databank.worldbank.org/data/download/WGI_csv.zip')
# myzip = ZipFile(BytesIO(z.read())).extract('COVID.csv')
# odf=pd.read_csv(myzip)
# Using a local copy of the datafile
eodf = pd.read_csv("tweet_recent_32k.csv",encoding = "utf-8")
eodf["post_date"] = pd.to_datetime(eodf["post_date"], unit='s')
print(eodf.dtypes)
print(len(eodf))
eodf.describe
# eodf0=odf[odf['Tweet Language']=='English']
# print(len(eodf0))
tweet_id int64 writer object post_date datetime64[ns] body object comment_num int64 retweet_num int64 like_num int64 dtype: object 32000
<bound method NDFrame.describe of tweet_id writer post_date \
0 1206558872653221889 StckPro 2019-12-16 12:57:06
1 1206558925354483714 seyedoption 2019-12-16 12:57:19
2 1206558990072733696 kevinduffy1929 2019-12-16 12:57:34
3 1206559129583673344 orthereaboot 2019-12-16 12:58:07
4 1206559187746086913 maxjcm 2019-12-16 12:58:21
... ... ... ...
31995 1212159765914079234 TEEELAZER 2019-12-31 23:53:03
31996 1212159838882533376 ShortingIsFun 2019-12-31 23:53:21
31997 1212160015332728833 Commuternyc 2019-12-31 23:54:03
31998 1212160410692046849 MoriaCrypto 2019-12-31 23:55:37
31999 1212160477159206912 treabase 2019-12-31 23:55:53
body comment_num \
0 $AAPL $DELL $GOOG NEW ARTICLE : Apple, Google,... 0
1 $es is now ATH,$amzn, if maximum in first 5-10... 1
2 Great companies are compounding machines, but ... 1
3 General disclosure:- I don’t ever give investm... 4
4 $AAPL privacy-preserving #MachineLearning to i... 0
... ... ...
31995 That $SPY $SPX puuump in the last hour was the... 1
31996 In 2020 I may start Tweeting out positive news... 0
31997 Patiently Waiting for the no twitter sitter tw... 0
31998 I don't discriminate. I own both $aapl and $ms... 1
31999 $AAPL #patent 10,522,475 Vertical interconnect... 0
retweet_num like_num
0 0 0
1 0 2
2 5 2
3 6 40
4 3 4
... ... ...
31995 0 6
31996 0 1
31997 0 5
31998 0 1
31999 0 0
[32000 rows x 7 columns]>
rem=["PS","RT","rt","ps"]
reml=["DATE","TIME","PERCENT","MONEY","QUANTITY","ORDINAL","CARDINAL"]
remf=[" United States","+","10.844","11.301","After","In","UPDATE","#medics","𝐀void","𝐓𝐡𝐞 𝐜𝐨𝐦𝐩𝐚𝐧𝐢𝐞𝐬 𝐚𝐫𝐞𝐧‘𝐭","𝐔se","𝐖ash","𝐘𝐎𝐔𝐑𝐒𝐄𝐋𝐅","𝐢𝐭’𝐬 𝐣𝐮𝐬𝐭","𝗩𝗶𝗿𝘂𝘀 𝗨𝗽𝗱𝗮𝘁𝗲","𝙊𝙗𝙨𝙚𝙧𝙫𝙖𝙩𝙞𝙤𝙣","Wash","‘Chen Qiushi","‘Diamond Princess’","‘Thermometer Guns’","• Sanofi","₹150 cr","≥4","⠀ -","~34K","+2,420","~900","y.","<","A Look at USA 2009",
"‘Diamond Princess","‘Thermometer Guns","⠀ -⠀","️ United States","️+","️10.844","️11.301","️After","️In","️UPDATE"]
def get_clean_ents(doc):
t=[]
doc1=nlp(doc)
if doc1.ents:
for ent in doc1.ents:
if ent.label_ not in reml:
et=ent.text
if et not in rem and et[0].isdigit()==False:
if '`' not in et and '"' not in et and '#' not in et and '@' not in et:
if et.startswith("#")==False and et.startswith("@")==False and et.startswith(".")==False and et.startswith(",")==False and et.startswith("-")==False and et.endswith("-")==False and et.startswith("&")==False and et.startswith("'")==False and et.startswith("+")==False and et.startswith("=")==False and et.startswith("A ")==False and et.startswith("A.")==False:
et=emoji.get_emoji_regexp().sub(r'', et)
et = re.sub(r'(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))', '', et).strip()
et=et.replace(" &","").replace(" |","").replace("’s","").replace("'","").replace("amp;/","").replace("w/","").replace("w/ 5","").replace("y’","").replace("|","").replace("◆","").replace("On","").strip()
et=et.strip()
if len(et)>1 and len(et)<40 and et[0].isdigit()==False and et!="&" and et!="━━━━━━━━━━━━" and et!="t." and et!="release - Office of Information Office of the Permanent":
if et.startswith("the ")==True:
et=et.replace("the ","").strip()
if et not in remf:
t.append(et)
if et.startswith("The ")==True:
et=et.replace("The ","").strip()
if et not in remf:
t.append(et)
if et.startswith("our ")==True:
et=et.replace("our ","").strip()
if et not in remf:
t.append(et)
if et.startswith("you ")==True:
et=et.replace("you ","").strip()
if et not in remf:
t.append(et)
if et.startswith("an ")==True:
et=et.replace("an ","").strip()
if et not in remf:
t.append(et)
if et.startswith("yes, ")==True:
et=et.replace("yes, ","").strip()
if et not in remf:
t.append(et)
if et.startswith("with "):
et=et.replace("with ","").strip()
if et not in remf:
t.append(et)
if et.startswith("h/t "):
et=et.replace("h/t ","").strip()
if et not in remf:
t.append(et)
if et.endswith(".."):
et=et.replace("..","").strip()
if et not in remf:
t.append(et)
if et.endswith(".")==True:
et=et.replace(".","").strip()
if et not in remf:
t.append(et)
if et.endswith("’")==True:
et=et.replace("’","").strip()
if et not in remf:
t.append(et)
if et.endswith("/")==True:
et=et.replace("/","").strip()
if et not in remf:
t.append(et)
else:
if et not in remf:
t.append(et)
return t
# eodf=eodf0.copy()
eodf=eodf.sample(n = 5000)
eodf=eodf[['post_date','writer','body']]
Hashtags_List=[]
No_Hashtags_List=[]
Mentioned_Tweeple_List=[]
No_Mentioned_Tweeple_List=[]
Named_Entities=[]
No_Named_Entities=[]
print('running entity recognition')
for i in range(len(eodf)):
c=eodf.iloc[i]['body']
htl=re.findall(r"#(\w+)", c)
Hashtags_List.append(htl)
No_Hashtags_List.append(len(htl))
mtl=re.findall(r"@(\w+)", c)
Mentioned_Tweeple_List.append(mtl)
No_Mentioned_Tweeple_List.append(len(mtl))
doc=eodf.iloc[i]["body"]
# doc1=nlp(doc)
Named_Entities.append(get_clean_ents(doc))
No_Named_Entities.append(len(get_clean_ents(doc)))
if i % 1000 == 0:
print('\tprocessed tweet {} of {}'.format(i, len(eodf)))
eodf["Hashtags List"]=Hashtags_List
eodf["No. of Hashtags List"]=No_Hashtags_List
eodf["Mentioned Tweeple List"]=Mentioned_Tweeple_List
eodf["No. of Mentioned Tweeple List"]=No_Mentioned_Tweeple_List
eodf["Named Entities"]=Named_Entities
eodf["No. of Named Entities"]=No_Named_Entities
eodf=eodf.rename(columns={"writer":"Sender Tweeple","post_date":"Time","body":"Text"})
eodf=eodf[["Time","Sender Tweeple","Hashtags List","No. of Hashtags List","Mentioned Tweeple List","No. of Mentioned Tweeple List","Named Entities","No. of Named Entities","Text"]]
print(len(eodf))
eodf
running entity recognition processed tweet 0 of 5000 processed tweet 1000 of 5000 processed tweet 2000 of 5000 processed tweet 3000 of 5000 processed tweet 4000 of 5000 5000
| Time | Sender Tweeple | Hashtags List | No. of Hashtags List | Mentioned Tweeple List | No. of Mentioned Tweeple List | Named Entities | No. of Named Entities | Text | |
|---|---|---|---|---|---|---|---|---|---|
| 15190 | 2019-12-23 16:04:30 | tradesbeta | [] | 0 | [] | 0 | [TSLA] | 1 | $TSLA four twenty |
| 13855 | 2019-12-23 04:03:04 | JonBryant421 | [] | 0 | [] | 0 | [aapl, Palm, aapl, Newton] | 4 | A very apt comparison. Palm was a startup with... |
| 18695 | 2019-12-24 20:41:24 | MMike2016 | [RIPDiesel, nomorefud] | 2 | [] | 0 | [TSLAQ FUDster] | 1 | $TSLAQ FUDster short squeeze of the century se... |
| 2230 | 2019-12-16 22:23:07 | fhurst4762 | [] | 0 | [timseymour] | 1 | [TSLA] | 1 | Please don't disappoint us @timseymour and be ... |
| 9461 | 2019-12-19 23:28:23 | GenProfits | [] | 0 | [oilTrader69, countertrend] | 2 | [] | 0 | @oilTrader69 @countertrend $tsla that’s a gamb... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 12857 | 2019-12-22 01:05:00 | Benzinga | [] | 0 | [] | 0 | [Benzinga, Apple, Microsoft, Netflix, AAPL, AC... | 9 | Benzinga's Bulls And Bears Of The Week: Apple,... |
| 21007 | 2019-12-26 16:50:55 | WallStSai | [] | 0 | [] | 0 | [AMZN] | 1 | $AMZN More trades Closed all these calls with ... |
| 6313 | 2019-12-18 16:25:00 | NaN | [] | 0 | [] | 0 | [TSLA, IVE] | 2 | Mom's about given up on me as well. Unless $TS... |
| 2425 | 2019-12-17 00:20:43 | ClassActionLaw | [AutoNews, Tesla, Autopilot, FCA, defective, t... | 6 | [] | 0 | [US, US, Tesla, FCA] | 4 | #AutoNews:U.S. agency probes 12th #Tesla crash... |
| 28655 | 2019-12-30 16:52:32 | stock_tidbits | [] | 0 | [] | 0 | [] | 0 | $NIO Next Wave/Surge coming!!$SPY $SPX $DIA $Q... |
5000 rows × 9 columns
# eodf.to_pickle('eodf.pkl')
# eodf = pd.read_pickle("eodf.pkl")
eodf["Time"] = pd.to_datetime(eodf["Time"])
# eodf.drop('Language', axis=1, inplace=True)
eodf.drop('Text', axis=1, inplace=True)
eodf=eodf.rename(columns={"No. of Hashtags List":"No. of Hashtags","No. of Mentioned Tweeple List":"No. of Mentioned Tweeple","Named Entities":"Named Entities List"})
eodf = eodf.sort_values(by="Time")
print(len(eodf))
eodf
5000
| Time | Sender Tweeple | Hashtags List | No. of Hashtags | Mentioned Tweeple List | No. of Mentioned Tweeple | Named Entities List | No. of Named Entities | |
|---|---|---|---|---|---|---|---|---|
| 2 | 2019-12-16 12:57:34 | kevinduffy1929 | [passivebubble] | 1 | [] | 0 | [AAPL, SPX, NDX] | 3 |
| 10 | 2019-12-16 13:02:16 | AutobahnTrading | [] | 0 | [] | 0 | [AAPL] | 1 |
| 13 | 2019-12-16 13:06:26 | bullishstocks7 | [] | 0 | [] | 0 | [XOM] | 1 |
| 15 | 2019-12-16 13:08:22 | 16Trippinborys | [] | 0 | [] | 0 | [AAPL] | 1 |
| 25 | 2019-12-16 13:15:59 | NaN | [] | 0 | [] | 0 | [Apple, AAPL] | 2 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 31991 | 2019-12-31 23:50:32 | wilwen2 | [] | 0 | [] | 0 | [Chinese] | 1 |
| 31992 | 2019-12-31 23:50:45 | MelaynaLokosky | [Autopilot] | 1 | [SenMarkey, NHTSAgov, FTC] | 3 | [Tesla] | 1 |
| 31994 | 2019-12-31 23:51:06 | GMGRIFF_79 | [] | 0 | [] | 0 | [AAPL] | 1 |
| 31997 | 2019-12-31 23:54:03 | Commuternyc | [] | 0 | [] | 0 | [] | 0 |
| 31998 | 2019-12-31 23:55:37 | MoriaCrypto | [] | 0 | [] | 0 | [aapl, msft] | 2 |
5000 rows × 8 columns
AllNamedEntities=[]
for i in range(len(eodf)):
ent=eodf.iloc[i]["Named Entities List"]
for e in ent:
AllNamedEntities.append(e)
print("NameEntities",len(AllNamedEntities),len(set(AllNamedEntities))) #71586 7109
AllHashtags=[]
for i in range(len(eodf)):
ent=eodf.iloc[i]["Hashtags List"]
for e in ent:
AllHashtags.append(e)
print("Hashtags",len(AllHashtags),len(set(AllHashtags))) #74317 6339
AllMentions=[]
for i in range(len(eodf)):
ent=eodf.iloc[i]["Mentioned Tweeple List"]
for e in ent:
AllMentions.append(e)
print("Mentions",len(AllMentions),len(set(AllMentions))) #37728 6662
NameEntities 8827 2145 Hashtags 2963 1078 Mentions 1529 562
eodfd=eodf.copy()
# eodfd['Time'] = pd.to_datetime(eodfd['Time'])
eodfd['Date'] = eodfd['Time'].dt.strftime('%d %b %Y')
eodfd["Date"] = pd.to_datetime(eodfd["Date"])
# Day=[]
# for i in range(len(eodfd)):
# t=eodfd.iloc[i]['Time']
# d=t.split(" ")
# Day.append(d[0])
# eodfd["Day"]=Day
eodfd=eodfd.rename(columns={"Named Entities":"Named Entities List"})
eodfd=eodfd[["Time","Date",'Sender Tweeple', 'Hashtags List', 'No. of Hashtags', 'Mentioned Tweeple List', 'No. of Mentioned Tweeple', 'Named Entities List', 'No. of Named Entities']]
eodfd=eodfd.sort_values(by="Date")
print(len(eodfd))
eodfd
5000
| Time | Date | Sender Tweeple | Hashtags List | No. of Hashtags | Mentioned Tweeple List | No. of Mentioned Tweeple | Named Entities List | No. of Named Entities | |
|---|---|---|---|---|---|---|---|---|---|
| 2 | 2019-12-16 12:57:34 | 2019-12-16 | kevinduffy1929 | [passivebubble] | 1 | [] | 0 | [AAPL, SPX, NDX] | 3 |
| 1639 | 2019-12-16 19:18:36 | 2019-12-16 | TeslaNY | [Tesla] | 1 | [] | 0 | [Tesla, Jonas, Morgan Stanley, TSLA] | 4 |
| 1638 | 2019-12-16 19:18:30 | 2019-12-16 | Mark_Lexus | [diagonalSpread] | 1 | [] | 0 | [] | 0 |
| 1631 | 2019-12-16 19:18:01 | 2019-12-16 | papaspoppinpeny | [] | 0 | [] | 0 | [EXMGF News, XOM, JBL, TM, ABML] | 5 |
| 1615 | 2019-12-16 19:16:38 | 2019-12-16 | c4chaos | [] | 0 | [] | 0 | [bank!Tesla Twitter] | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 30650 | 2019-12-31 14:36:47 | 2019-12-31 | TeslaDiehardFan | [] | 0 | [jjhanna2] | 1 | [] | 0 |
| 30639 | 2019-12-31 14:34:06 | 2019-12-31 | hugobesley | [] | 0 | [] | 0 | [TSLA] | 1 |
| 30628 | 2019-12-31 14:30:07 | 2019-12-31 | 247WallSt | [] | 0 | [] | 0 | [AMZN, AAPL, MAXR, TSLA] | 4 |
| 30607 | 2019-12-31 14:23:00 | 2019-12-31 | realsheepwolf | [] | 0 | [] | 0 | [THMO, ONTX, BNTX, ADBE, MSFT, AAPL, SIRI, HSDT] | 8 |
| 31998 | 2019-12-31 23:55:37 | 2019-12-31 | MoriaCrypto | [] | 0 | [] | 0 | [aapl, msft] | 2 |
5000 rows × 9 columns
eodfd1=eodfd.copy()
gdf000=eodfd1.groupby(['Sender Tweeple',"Date"]).agg({'Time':lambda x: list(x),'Hashtags List': lambda x: list(x),'Mentioned Tweeple List':lambda x: list(x),'Named Entities List': lambda x: list(x)}).reset_index() #
gdf000["no_tweets"]=gdf000.Time.map(len)
gdf000=gdf000.sort_values(by="Date")
print(len(gdf000))
gdf000
3827
| Sender Tweeple | Date | Time | Hashtags List | Mentioned Tweeple List | Named Entities List | no_tweets | |
|---|---|---|---|---|---|---|---|
| 0 | 025shields | 2019-12-16 | [2019-12-16 21:00:26] | [[]] | [[]] | [[TSLA]] | 1 |
| 2653 | dyip96 | 2019-12-16 | [2019-12-16 15:02:46] | [[]] | [[]] | [[]] | 1 |
| 2658 | echarkov | 2019-12-16 | [2019-12-16 14:58:13] | [[ES_F]] | [[]] | [[NDX, TSLA, QQQ]] | 1 |
| 641 | EveryoneSaysHi1 | 2019-12-16 | [2019-12-16 21:56:14] | [[]] | [[]] | [[]] | 1 |
| 634 | EscobarTrades | 2019-12-16 | [2019-12-16 16:27:46] | [[]] | [[HCPG]] | [[tsla]] | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 1842 | Supermantibody | 2019-12-31 | [2019-12-31 17:17:48] | [[BabyYoda]] | [[]] | [[BabyYoda]] | 1 |
| 1660 | Sarcastic_Azzz | 2019-12-31 | [2019-12-31 20:46:24] | [[]] | [[]] | [[FED, AAPL, upSales]] | 1 |
| 759 | GoingGreenPicks | 2019-12-31 | [2019-12-31 17:25:34, 2019-12-31 12:22:14] | [[], []] | [[], []] | [[HTSC, DTC, ALPP, MTCH, PHVAF], [HTSC, DTC, C... | 2 |
| 1664 | Scalp_City | 2019-12-31 | [2019-12-31 01:53:00] | [[]] | [[]] | [[AAPL, AMZN, TSLA, ROKU]] | 1 |
| 2587 | davidmoadel | 2019-12-31 | [2019-12-31 16:30:20, 2019-12-31 16:19:02, 201... | [[stockmarket, investing, finance, stocks, gol... | [[investorplace], [], [investorplace], [invest... | [[Chesapeake Energy, USO, INTC, NFLX, TLT], [S... | 5 |
3827 rows × 7 columns
eodfd2=eodfd.copy()
gdf=eodfd2.groupby(["Date"]).agg({'Time':lambda x: list(x),'Sender Tweeple': lambda x: list(x),'Hashtags List': lambda x: list(x),'Mentioned Tweeple List':lambda x: list(x),'Named Entities List': lambda x: list(x)}).reset_index() #
gdf["No. of Tweets"]=gdf.Time.map(len)
# gdf["no_hashtags"]=gdf.hashtags.map(len)
# gdf["no_senders"]=gdf.screen_name.map(len)
gdf["Date"] = pd.to_datetime(gdf["Date"])
gdf=gdf.rename(columns={"Sender Tweeple":"Sender Tweeple List"})
gdf=gdf[["Date","Time","No. of Tweets","Sender Tweeple List","Hashtags List","Mentioned Tweeple List",'Named Entities List']]
gdf =gdf.sort_values(by="Date")
print(len(gdf))
gdf
16
| Date | Time | No. of Tweets | Sender Tweeple List | Hashtags List | Mentioned Tweeple List | Named Entities List | |
|---|---|---|---|---|---|---|---|
| 0 | 2019-12-16 | [2019-12-16 12:57:34, 2019-12-16 19:18:36, 201... | 364 | [kevinduffy1929, TeslaNY, Mark_Lexus, papaspop... | [[passivebubble], [Tesla], [diagonalSpread], [... | [[], [], [], [], [], [sibonobo, PandoraMusic],... | [[AAPL, SPX, NDX], [Tesla, Jonas, Morgan Stanl... |
| 1 | 2019-12-17 | [2019-12-17 17:30:17, 2019-12-17 17:29:28, 201... | 385 | [newsomenuggets, OphirGottlieb, DenisGobo, JCO... | [[], [], [], [], [], [], [], [], [], [], [equi... | [[TrendSpider], [barronsonline], [], [], [], [... | [[TSLA], [Cowen], [Google, CloudThe Google, Mi... |
| 2 | 2019-12-18 | [2019-12-18 19:07:12, 2019-12-18 19:24:03, 201... | 495 | [John54436259, KingMidasAu1, EliteOptions2, Eq... | [[], [], [], [], [], [], [trading, OptionsTrad... | [[], [], [], [], [], [], [], [elonmusk], [], [... | [[China], [SBES Keep, aapl], [TSLA], [Tesla, T... |
| 3 | 2019-12-19 | [2019-12-19 19:47:18, 2019-12-19 19:55:42, 201... | 224 | [commandenteSD, TSOH_Investing, realsheepwolf,... | [[], [], [], [], [CNBC, Tesla, china, Cybertru... | [[3Clicksinvest], [], [], [], [CNBC], [], [Oph... | [[AMZN], [AWS, AWS, Redis Labs, Redis, AMZN], ... |
| 4 | 2019-12-20 | [2019-12-20 17:30:18, 2019-12-20 17:03:36, 201... | 378 | [DentonBlackwell, RichLightShed, kaiserassocte... | [[], [], [stocks], [Equity, Options], [], [The... | [[], [], [themotleyfool], [], [], [], [], [], ... | [[AMZN, GOOOOOOOOO], [Apple, AAPL, AMZN], [Ama... |
| 5 | 2019-12-21 | [2019-12-21 17:59:14, 2019-12-21 17:51:36, 201... | 155 | [ChrisSpelter, MarcoMNYC, Commuternyc, Bearing... | [[], [], [], [], [], [], [], [], [], [], [], [... | [[], [], [], [], [RobinhoodApp], [], [], [], [... | [[US, Netherlands, Time], [AMZN, Holyoke, Mass... |
| 6 | 2019-12-22 | [2019-12-22 17:16:34, 2019-12-22 17:16:38, 201... | 133 | [TalkMarkets, vincent13031925, Insurmountabl1,... | [[], [By, Tesla, Netherlands], [], [ElonMuskIs... | [[], [EvaFoxU, flcnhvy, Tesmanian_com], [], [e... | [[], [Tesla EOQ, Netherlands], [], [Elon Musk,... |
| 7 | 2019-12-23 | [2019-12-23 17:27:50, 2019-12-23 17:29:33, 201... | 544 | [seyedoption, AnalysisFin, Viidakkotimppa, Kam... | [[], [investors], [], [], [DayTrading, PROCESS... | [[], [], [], [CGrantWSJ], [], [], [], [], [cnb... | [[aapl], [], [TSLA, Tesla, God-King Musk], [Tw... |
| 8 | 2019-12-24 | [2019-12-24 16:57:27, 2019-12-24 16:56:11, 201... | 252 | [Roka_Dura, econalert, shortvolumes, marketmin... | [[], [patent, tech, IP, research], [], [ES_F, ... | [[], [], [], [], [], [], [], [], [ValueAnalyst... | [[TSLA], [Feeder], [$IBB, AMZN], [SPX, AAPL, Q... |
| 9 | 2019-12-25 | [2019-12-25 17:12:13, 2019-12-25 16:34:03, 201... | 82 | [MMASSASSIN, ModelYendofICE, xsaltwedgex, Cryp... | [[], [], [], [tech, technology, TechNews, inve... | [[], [], [], [], [], [], [], [], [BagholderQuo... | [[GWH, GWH, TSLA, China, Germany], [TESLA], [T... |
| 10 | 2019-12-26 | [2019-12-26 18:41:49, 2019-12-26 18:42:55, 201... | 504 | [15minofPham, MalibuInvest, Bazooka_J0E, TheBu... | [[], [], [], [estate], [FLOWTRADE], [], [], []... | [[], [MalibuPrivateThink], [], [], [RealFlowTr... | [[], [GOOGL], [TSLA, Saturn], [ROKU, PCG], [AM... |
| 11 | 2019-12-27 | [2019-12-27 17:18:44, 2019-12-27 17:43:31, 201... | 410 | [stockhoot, commandenteSD, Roka_Dura, peedeehe... | [[TradeIdea], [], [], [Tesla, Daimler, BMW, GM... | [[], [], [], [], [], [], [], [], [VendbienJon]... | [[AMZN - Winning Options, James Catzen], [AAPL... |
| 12 | 2019-12-28 | [2019-12-28 18:30:24, 2019-12-28 17:44:10, 201... | 163 | [calgarybourbon1, Biggie_Calls, StockTradePick... | [[], [], [], [], [fright], [Win7toWin10], [], ... | [[elonmusk], [], [], [], [], [], [], [], [], [... | [[TSLA, TESLA, Elon], [Elon “king”], [TSLA], [... |
| 13 | 2019-12-29 | [2019-12-29 18:55:35, 2019-12-29 18:36:06, 201... | 161 | [vincent13031925, SteelNicho, HedgehogOptions,... | [[Tesla], [], [Frozen2, AvengersEndgame, LionK... | [[], [], [RobertIger], [], [], [], [], [], [19... | [[Tesla, Tesla], [Canadians], [Spydey Goat, NF... |
| 14 | 2019-12-30 | [2019-12-30 17:59:59, 2019-12-30 18:00:11, 201... | 417 | [MaxSpeculation, ntfri, xrobertm, FinTwitTSLA,... | [[], [], [], [], [], [], [], [Tesla, Israel], ... | [[], [], [], [], [], [OptionsPastor], [], [], ... | [[aapl, aapl], [AAPL], [AAPL], [], [NIO], [SPY... |
| 15 | 2019-12-31 | [2019-12-31 18:09:23, 2019-12-31 17:56:23, 201... | 333 | [HulkCapital, JerryTauber, realsheepwolf, john... | [[], [], [], [], [], [], [], [], [], [], [], [... | [[], [AskGeorgeBailey, YoelMinkoff], [], [], [... | [[], [Apple, AAPL, Facebook, FB], [ONTX, BNTX,... |
def flatli(list):
return [item for sublist in list for item in sublist]
no_tweeple=[]
no_unique_tweeple=[]
no_hashtags=[]
no_unique_hashtags=[]
no_mentions=[]
no_unique_mentions=[]
list_mentioned_tweeple=[]
list_hashtags=[]
no_named_entities=[]
no_unique_named_entities=[]
list_named_entities=[]
for i in range(len(gdf)):
tweeple=gdf.iloc[i]["Sender Tweeple List"]
no_tweeple.append(len(tweeple))
no_unique_tweeple.append(len(set(tweeple)))
hashtags=gdf.iloc[i]["Hashtags List"]
t1=flatli(hashtags)
no_hashtags.append(len(t1))
no_unique_hashtags.append(len(set(t1)))
list_hashtags.append(t1)
mentions=gdf.iloc[i]["Mentioned Tweeple List"]
t2=flatli(mentions)
no_mentions.append(len(t2))
no_unique_mentions.append(len(set(t2)))
list_mentioned_tweeple.append(t2)
namedentities=gdf.iloc[i]["Named Entities List"]
t3=flatli(namedentities)
no_named_entities.append(len(t3))
no_unique_named_entities.append(len(set(t3)))
list_named_entities.append(t3)
gdf["No. of Senders Tweeple"]=no_tweeple
gdf["No. of Unique Senders Tweeple"]=no_unique_tweeple
gdf["No. of Hashtags"]=no_hashtags
gdf["No. of Unique Hashtags"]=no_unique_hashtags
gdf["No. of Mentioned Tweeple"]=no_mentions
gdf["No. of Unique Mentioned Tweeple"]=no_unique_mentions
gdf["No. of Named Entities"]=no_named_entities
gdf["No. of Unique Named Entities"]=no_unique_named_entities
gdf["List of Hashtags"]=list_hashtags
gdf["List of Mentioned Tweeple"]=list_mentioned_tweeple
gdf["List of Named Entities"]=list_named_entities
gdf=gdf[["Date","Time","No. of Tweets","Sender Tweeple List","No. of Unique Senders Tweeple","List of Hashtags","No. of Unique Hashtags","List of Mentioned Tweeple","No. of Unique Mentioned Tweeple","List of Named Entities","No. of Unique Named Entities"]]
gdf
| Date | Time | No. of Tweets | Sender Tweeple List | No. of Unique Senders Tweeple | List of Hashtags | No. of Unique Hashtags | List of Mentioned Tweeple | No. of Unique Mentioned Tweeple | List of Named Entities | No. of Unique Named Entities | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2019-12-16 | [2019-12-16 12:57:34, 2019-12-16 19:18:36, 201... | 364 | [kevinduffy1929, TeslaNY, Mark_Lexus, papaspop... | 287 | [passivebubble, Tesla, diagonalSpread, Unusual... | 119 | [sibonobo, PandoraMusic, TFNN, elonmusk, jimcr... | 85 | [AAPL, SPX, NDX, Tesla, Jonas, Morgan Stanley,... | 254 |
| 1 | 2019-12-17 | [2019-12-17 17:30:17, 2019-12-17 17:29:28, 201... | 385 | [newsomenuggets, OphirGottlieb, DenisGobo, JCO... | 288 | [equitymarket, marketanalysis, g7, TeslaServic... | 135 | [TrendSpider, barronsonline, CheddarFlow, WSJ,... | 85 | [TSLA, Cowen, Google, CloudThe Google, Microso... | 280 |
| 2 | 2019-12-18 | [2019-12-18 19:07:12, 2019-12-18 19:24:03, 201... | 495 | [John54436259, KingMidasAu1, EliteOptions2, Eq... | 368 | [trading, OptionsTrading, Christmas, BuffaloBi... | 156 | [elonmusk, SEC_Investor_Ed, SEC_Enforcement, e... | 97 | [China, SBES Keep, aapl, TSLA, Tesla, Tesla, T... | 328 |
| 3 | 2019-12-19 | [2019-12-19 19:47:18, 2019-12-19 19:55:42, 201... | 224 | [commandenteSD, TSOH_Investing, realsheepwolf,... | 185 | [CNBC, Tesla, china, Cybertruck, FSD, EV, Sola... | 181 | [3Clicksinvest, CNBC, OphirGottlieb, saxena_pu... | 52 | [AMZN, AWS, AWS, Redis Labs, Redis, AMZN, ZM, ... | 219 |
| 4 | 2019-12-20 | [2019-12-20 17:30:18, 2019-12-20 17:03:36, 201... | 378 | [DentonBlackwell, RichLightShed, kaiserassocte... | 307 | [stocks, Equity, Options, TheIndependent, US, ... | 161 | [themotleyfool, elonmusk, elonmusk, JimBridens... | 103 | [AMZN, GOOOOOOOOO, Apple, AAPL, AMZN, Amazon, ... | 270 |
| 5 | 2019-12-21 | [2019-12-21 17:59:14, 2019-12-21 17:51:36, 201... | 155 | [ChrisSpelter, MarcoMNYC, Commuternyc, Bearing... | 129 | [maxpain, options, ps60, Tesla, TeslaServiceIs... | 86 | [RobinhoodApp, Lebeaucarnews, BlackBerry, WSJ,... | 36 | [US, Netherlands, Time, AMZN, Holyoke, Mass, U... | 158 |
| 6 | 2019-12-22 | [2019-12-22 17:16:34, 2019-12-22 17:16:38, 201... | 133 | [TalkMarkets, vincent13031925, Insurmountabl1,... | 108 | [By, Tesla, Netherlands, ElonMuskIsNotACrimina... | 84 | [EvaFoxU, flcnhvy, Tesmanian_com, elonmusk, Ne... | 50 | [Tesla EOQ, Netherlands, Elon Musk, SEC, Tsla,... | 133 |
| 7 | 2019-12-23 | [2019-12-23 17:27:50, 2019-12-23 17:29:33, 201... | 544 | [seyedoption, AnalysisFin, Viidakkotimppa, Kam... | 424 | [investors, DayTrading, PROCESS, R, StayGreen,... | 202 | [CGrantWSJ, cnbc, elonmusk, elonmusk, RedlerAl... | 101 | [aapl, TSLA, Tesla, God-King Musk, Twitter, TS... | 347 |
| 8 | 2019-12-24 | [2019-12-24 16:57:27, 2019-12-24 16:56:11, 201... | 252 | [Roka_Dura, econalert, shortvolumes, marketmin... | 216 | [patent, tech, IP, research, ES_F, trading, GF... | 91 | [ValueAnalyst1, RobinhoodApp, elonmusk, Tesla,... | 51 | [TSLA, Feeder, $IBB, AMZN, SPX, AAPL, QQQ, Mob... | 215 |
| 9 | 2019-12-25 | [2019-12-25 17:12:13, 2019-12-25 16:34:03, 201... | 82 | [MMASSASSIN, ModelYendofICE, xsaltwedgex, Cryp... | 67 | [tech, technology, TechNews, investing, CyberS... | 71 | [BagholderQuotes, TylerKeepers, BigBoyEby, Tyl... | 31 | [GWH, GWH, TSLA, China, Germany, TESLA, TSLA, ... | 85 |
| 10 | 2019-12-26 | [2019-12-26 18:41:49, 2019-12-26 18:42:55, 201... | 504 | [15minofPham, MalibuInvest, Bazooka_J0E, TheBu... | 354 | [estate, FLOWTRADE, StockMarketNews, Learntotr... | 135 | [MalibuPrivateThink, RealFlowTrade, Wedbush, T... | 51 | [GOOGL, TSLA, Saturn, ROKU, PCG, AMZN, AMZN, S... | 276 |
| 11 | 2019-12-27 | [2019-12-27 17:18:44, 2019-12-27 17:43:31, 201... | 410 | [stockhoot, commandenteSD, Roka_Dura, peedeehe... | 327 | [TradeIdea, Tesla, Daimler, BMW, GM, Ford, Lit... | 190 | [VendbienJon, elonmusk, SpaceX, neuralink, elo... | 79 | [AMZN - Winning Options, James Catzen, AAPL, l... | 302 |
| 12 | 2019-12-28 | [2019-12-28 18:30:24, 2019-12-28 17:44:10, 201... | 163 | [calgarybourbon1, Biggie_Calls, StockTradePick... | 125 | [fright, Win7toWin10, WallStreet, earnings, st... | 72 | [elonmusk, philiped, stockspastor, Arrayit, th... | 23 | [TSLA, TESLA, Elon, Elon “king”, TSLA, TSLA, k... | 132 |
| 13 | 2019-12-29 | [2019-12-29 18:55:35, 2019-12-29 18:36:06, 201... | 161 | [vincent13031925, SteelNicho, HedgehogOptions,... | 120 | [Tesla, Frozen2, AvengersEndgame, LionKing, Fr... | 92 | [RobertIger, 1955http, WallStCynic, iluvstocks... | 53 | [Tesla, Tesla, Canadians, Spydey Goat, NFLX, A... | 147 |
| 14 | 2019-12-30 | [2019-12-30 17:59:59, 2019-12-30 18:00:11, 201... | 417 | [MaxSpeculation, ntfri, xrobertm, FinTwitTSLA,... | 294 | [Tesla, Israel, FSD, Recall, CryptoCorner, Blo... | 115 | [OptionsPastor, NHTSA, TheJusticeDept, FBI, SE... | 70 | [aapl, aapl, AAPL, AAPL, NIO, SPY, AAPL, G/L, ... | 257 |
| 15 | 2019-12-31 | [2019-12-31 18:09:23, 2019-12-31 17:56:23, 201... | 333 | [HulkCapital, JerryTauber, realsheepwolf, john... | 243 | [patent, tech, IP, research, Tesla, TeslaStock... | 106 | [AskGeorgeBailey, YoelMinkoff, TESLAcharts, bt... | 69 | [Apple, AAPL, Facebook, FB, ONTX, BNTX, ADBE, ... | 233 |
list(gdf.columns)
['Date', 'Time', 'No. of Tweets', 'Sender Tweeple List', 'No. of Unique Senders Tweeple', 'List of Hashtags', 'No. of Unique Hashtags', 'List of Mentioned Tweeple', 'No. of Unique Mentioned Tweeple', 'List of Named Entities', 'No. of Unique Named Entities']
gd1a=gdf[["Date","No. of Tweets"]] #,"No. of Unique Senders Tweeple","No. of Unique Hashtags","No. of Unique Mentioned Tweeple"]]
gd1a['Date'] = pd.to_datetime(gd1a['Date'], format='%Y-%m-%d')
gd1a["Date"] = gd1a["Date"].dt.strftime('%Y-%m-%d')
twplotsa=gd1a.hvplot(x='Date',stacked=True, legend='top_left')
sst='Number of Tweets per Day' #, Senders Tweeple, Hashtags and Mentioned Tweeple per Date'
twplotsa=twplotsa.relabel(sst).opts(title=sst,width=450,height=300,shared_axes=False,tools=['hover'],xrotation=90, yformatter='%.0f')
gd1b=gdf[["Date","No. of Unique Senders Tweeple"]] #,,"",""]]
gd1b['Date'] = pd.to_datetime(gd1b['Date'], format='%Y-%m-%d')
gd1b["Date"] = gd1b["Date"].dt.strftime('%Y-%m-%d')
twplotsb=gd1b.hvplot(x='Date',stacked=True, legend='top_left')
sst='Number of Unique Senders Tweeple per Day' #, Senders Tweeple, Hashtags and Mentioned Tweeple per Date'
twplotsb=twplotsb.relabel(sst).opts(title=sst,width=450,height=300,shared_axes=False,tools=['hover'],xrotation=90, yformatter='%.0f',color="orange")
gd1c=gdf[["Date","No. of Unique Hashtags"]] #,"No. of Unique Senders Tweeple","No. of Unique Hashtags","No. of Unique Mentioned Tweeple"]]
gd1c['Date'] = pd.to_datetime(gd1c['Date'], format='%Y-%m-%d')
gd1c["Date"] = gd1c["Date"].dt.strftime('%Y-%m-%d')
twplotsc=gd1c.hvplot(x='Date',stacked=True, legend='top_left')
sst='Number of Unique Hashtags per Day' #, Senders Tweeple, Hashtags and Mentioned Tweeple per Date'
twplotsc=twplotsc.relabel(sst).opts(title=sst,width=450,height=300,shared_axes=False,tools=['hover'],xrotation=90, yformatter='%.0f',color="green")
gd1d=gdf[["Date","No. of Unique Mentioned Tweeple"]] #,"No. of Unique Senders Tweeple","No. of Unique Hashtags","No. of Unique Mentioned Tweeple"]]
gd1d['Date'] = pd.to_datetime(gd1d['Date'], format='%Y-%m-%d')
gd1d["Date"] = gd1d["Date"].dt.strftime('%Y-%m-%d')
twplotsd=gd1d.hvplot(x='Date',stacked=True, legend='top_left')
sst='Number of Unique Mentioned Tweeple per Day' #, Senders Tweeple, Hashtags and Mentioned Tweeple per Date'
twplotsd=twplotsd.relabel(sst).opts(title=sst,width=450,height=300,shared_axes=False,tools=['hover'],xrotation=90, yformatter='%.0f',color="red")
gd1e=gdf[["Date","No. of Unique Named Entities"]] #,"No. of Unique Senders Tweeple","No. of Unique Hashtags","No. of Unique Mentioned Tweeple"]]
gd1e['Date'] = pd.to_datetime(gd1e['Date'], format='%Y-%m-%d')
gd1e["Date"] = gd1e["Date"].dt.strftime('%Y-%m-%d')
twplotse=gd1e.hvplot(x='Date',stacked=True, legend='top_left')
sst='Number of Unique Named Entities per Day' #, Senders Tweeple, Hashtags and Mentioned Tweeple per Date'
twplotse=twplotse.relabel(sst).opts(title=sst,width=450,height=300,shared_axes=False,tools=['hover'],xrotation=90, yformatter='%.0f',color="cyan")
layout = hv.Layout(twplotsa + twplotsb + twplotsc + twplotsd + twplotse).cols(2)
layout
ct=list(set(eodfd["Sender Tweeple"].unique()).intersection(set(AllMentions)))
data = {'No. of Tweets': len(eodf),
'No. of Hashtags':len(set(AllHashtags)),
'No. of Senders Tweeple': len(eodfd["Sender Tweeple"].unique()),
'No. of Mentioned Tweeple': len(set(AllMentions)),
'No. of Senders-Mentioned Tweeple':len(ct),
'No. of Named Entities':len(set(flatli(eodfd["Named Entities List"].tolist()))) #len(set([e for e]))
}
gdt = pd.DataFrame (data, columns = ["No. of Tweets","No. of Hashtags","No. of Senders Tweeple","No. of Mentioned Tweeple","No. of Senders-Mentioned Tweeple",'No. of Named Entities'], index=["Total"])
gdt
| No. of Tweets | No. of Hashtags | No. of Senders Tweeple | No. of Mentioned Tweeple | No. of Senders-Mentioned Tweeple | No. of Named Entities | |
|---|---|---|---|---|---|---|
| Total | 5000 | 1078 | 2144 | 562 | 94 | 2145 |
du1=gdt.T
du1=du1.reset_index()
du1
sst="Basic Counts (during %i days from %s to %s)" %(len(gdf),min(eodf['Time']),max(eodf['Time']))
plot=du1.hvplot.bar(legend='top_right').opts(title=sst,width=800,height=500,color='index', cmap='Category10', yformatter='%.0f',xrotation=15)
xs, ys = plot.dimension_values(0), plot.dimension_values(1)
labels = hv.Labels((xs, ys-0.03*ys.max(), ys), kdims=plot.dimensions(), dataset=plot.dataset)
labels=labels(opts.Labels(text_color="white"))
plot * labels
WARNING:param.Labels04332: Use of __call__ to set options will be deprecated in the next major release (1.14.0). Use the equivalent .opts method instead.
# def connected_component_subgraphs(G):
# for c in nx.connected_components(G):
# yield G.subgraph(c)
def hvnx_plot(G,ctype,pos,width,height,node_size,node_cmap,edge_color,edge_line_width,
title,bundled,nodelabels,xoffset,yoffset,
arrowhead_length,selection_mode,selection_policy,
edge_hover_line_color,node_hover_fill_color,
fontsize,text_font_size, text_color,bgcolor):
if nx.is_directed(G)==True:
in_neighbors={} #arriving_airlines={}
for n in G.nodes():
t=list(G.predecessors(n))
in_neighbors[n]=", ".join(sorted([x for x in t]))
# print(in_neighbors)
out_neighbors={} #departing_airlines={}
for n in G.nodes():
t=list(G.successors(n))
out_neighbors[n]=", ".join(sorted([x for x in t]))
# print(out_neighbors)
if ctype==1:
communities_generator = community.girvan_newman(G)
top_level_communities = next(communities_generator)
next_level_communities = next(communities_generator)
lc=sorted(sorted(map(sorted, next_level_communities)), key=len,reverse=True)
partition={n:i for i,c in enumerate(lc) for n in c }
print("No. of Girvan-Newman communities",len(set(partition.values())))
for n in G.nodes():
G.nodes[n]['Girvan_Newman_community'] = partition[n]
# if (n,n) in G.edges():
# G.nodes[n]['in-degree'] = 0
# G.nodes[n]['in-neighbors'] = ""
# G.nodes[n]['out-degree'] = 0
# G.nodes[n]['out-neighbors'] = ""
# else:
G.nodes[n]['in-degree'] = G.in_degree(n)
G.nodes[n]['in_neighbors'] = in_neighbors[n]
# G.nodes[n]['arriving airlines'] = arriving_airlines[n]
G.nodes[n]['out-degree'] = G.out_degree(n)
G.nodes[n]['out_neighbors'] = out_neighbors[n]
# G.nodes[n]['departing airlines'] = departing_airlines[n]
graph = hvnx.draw(G, pos)
graph.opts(edge_color=edge_color,edge_line_width=edge_line_width,node_size=node_size,node_color='Girvan_Newman_community',node_cmap=node_cmap)
if bundled==0:
graph.opts(selection_policy=selection_policy,title=title,edge_hover_line_color=edge_hover_line_color,node_hover_fill_color=node_hover_fill_color,fontsize=fontsize,width=width,height=height,arrowhead_length=arrowhead_length) #,tools=tools) #,'box_zoom',"tap"])
if nodelabels==1:
labels = hv.Labels(graph.nodes, ['x', 'y'], 'index')
graph=(graph * labels.opts(xoffset=xoffset, yoffset=yoffset,text_font_size=text_font_size, text_color=text_color, bgcolor=bgcolor))
print(labels)
return graph
else:
return graph
else:
graph = bundle_graph(graph)
graph.opts(selection_policy=selection_policy,title=title,edge_hover_line_color=edge_hover_line_color,node_hover_fill_color=node_hover_fill_color,fontsize=fontsize,width=width,height=height,arrowhead_length=arrowhead_length) #,tools=tools) #,'box_zoom',"tap"])
if nodelabels==1:
labels = hv.Labels(graph.nodes, ['x', 'y'], 'index')
graph=(graph * labels.opts(xoffset=xoffset, yoffset=yoffset,text_font_size=text_font_size, text_color=text_color, bgcolor=bgcolor))
return graph
else:
return graph
if ctype==0:
print("Louvain communities are not computable for directed graphs!")
else:
connections={}
for n in G.nodes():
t=list(G.neighbors(n))
connections[n]=", ".join(sorted([x for x in t]))
# connected_components={}
# Gcc = sorted(connected_component_subgraphs(G), key = len, reverse=True)
# ccl=[list(g.nodes) for g in Gcc]
# # ccl=sorted(ccl, key=lambda item: len(item[1]), reverse=True)
# for i,j in enumerate(ccl):
# for n in j:
# connected_components[n]=i
if ctype==1:
communities_generator = community.girvan_newman(G)
top_level_communities = next(communities_generator)
next_level_communities = next(communities_generator)
lc=sorted(sorted(map(sorted, next_level_communities)), key=len,reverse=True)
partition={n:i for i,c in enumerate(lc) for n in c }
# print("No. of connected components",len(ccl))
print("No. of Girvan-Newman communities",len(set(partition.values())))
for n in G.nodes():
G.nodes[n]['Girvan-Newman_community'] = partition[n]
# if (n,n) in G.edges():
# G.nodes[n]['degree'] = 0
# G.nodes[n]['connections'] = ""
# # G.nodes[n]['connected_component'] = connected_components[n]
# else:
G.nodes[n]['degree'] = G.degree(n)
G.nodes[n]['connections'] = connections[n] #", ".join([str(x) for x in list(G.neighbors(n))])
# G.nodes[n]['connected_component'] = connected_components[n]
# for n in G.nodes():
# G.nodes[n]['Louvain_community'] = partition[n]
graph = hvnx.draw(G, pos)
graph.opts(edge_color=edge_color,edge_line_width=edge_line_width,node_size=node_size,node_color='Girvan-Newman_community',node_cmap=node_cmap)
graph.opts(padding=0.15)
if bundled==0:
graph.opts(selection_policy=selection_policy,title=title,edge_hover_line_color=edge_hover_line_color,node_hover_fill_color=node_hover_fill_color,fontsize=fontsize,width=width,height=height,arrowhead_length=arrowhead_length) #,tools=tools) #,'box_zoom',"tap"])
if nodelabels==1:
labels = hv.Labels(graph.nodes, ['x', 'y'], 'index')
graph=(graph * labels.opts(xoffset=xoffset, yoffset=yoffset,text_font_size=text_font_size, text_color=text_color, bgcolor=bgcolor))
return graph
else:
return graph
else:
graph = bundle_graph(graph)
graph.opts(selection_policy=selection_policy,title=title,edge_hover_line_color=edge_hover_line_color,node_hover_fill_color=node_hover_fill_color,fontsize=fontsize,width=width,height=height,arrowhead_length=arrowhead_length) #,tools=tools) #,'box_zoom',"tap"])
if nodelabels==1:
labels = hv.Labels(graph.nodes, ['x', 'y'], 'index')
graph=(graph * labels.opts(xoffset=xoffset, yoffset=yoffset,text_font_size=text_font_size, text_color=text_color, bgcolor=bgcolor))
return graph
else:
return graph
if ctype==0:
partition_l=louvain.best_partition(G)
# print("No. of connected components",len(ccl))
print("No. of Louvain communities",len(set(partition_l.values())))
for n in G.nodes():
G.nodes[n]['Louvain_community'] = partition_l[n]
if (n,n) in G.edges():
G.nodes[n]['degree'] = 0
G.nodes[n]['connections'] = ""
# G.nodes[n]['connected_component'] = connected_components[n]
else:
G.nodes[n]['degree'] = G.degree(n)
G.nodes[n]['connections'] = connections[n] #", ".join([str(x) for x in list(G.neighbors(n))]) #connections
# G.nodes[n]['connected_component'] = connected_components[n]
graph = hvnx.draw(G, pos)
graph.opts(edge_color=edge_color,edge_line_width=edge_line_width,node_size=node_size,node_color='Louvain_community',node_cmap=node_cmap)
graph.opts(padding=0.15)
if bundled==0:
graph.opts(selection_policy=selection_policy,title=title,edge_hover_line_color=edge_hover_line_color,node_hover_fill_color=node_hover_fill_color,fontsize=fontsize,width=width,height=height,arrowhead_length=arrowhead_length) #,tools=tools) #,'box_zoom',"tap"])
if nodelabels==1:
labels = hv.Labels(graph.nodes, ['x', 'y'], 'index')
graph=(graph * labels.opts(xoffset=xoffset, yoffset=yoffset,text_font_size=text_font_size, text_color=text_color, bgcolor=bgcolor))
return graph
else:
return graph
else:
graph = bundle_graph(graph)
graph.opts(selection_policy=selection_policy,title=title,edge_hover_line_color=edge_hover_line_color,node_hover_fill_color=node_hover_fill_color,fontsize=fontsize,width=width,height=height,arrowhead_length=arrowhead_length) #,tools=tools) #,'box_zoom',"tap"])
if nodelabels==1:
labels = hv.Labels(graph.nodes, ['x', 'y'], 'index')
graph=(graph * labels.opts(xoffset=xoffset, yoffset=yoffset,text_font_size=text_font_size, text_color=text_color, bgcolor=bgcolor))
return graph
else:
return graph
days=sorted([str(d)[:10] for d in eodfd["Date"].unique()])
GGh={}
for day in days:
dfd=eodfd[eodfd["Date"]==day]
meds=[]
for i in range(len(dfd)):
hashtags=dfd.iloc[i]["Hashtags List"]
if len(hashtags)>1:
aa=list(itertools.combinations(hashtags,2))
for j in aa:
meds.append((j[0],j[1]))
maeds=[]
for k,v in Counter(meds).items():
maeds.append((k[0],k[1],v))
Ghm=nx.MultiGraph()
Ghm.add_weighted_edges_from(maeds)
weight=[(x, y, {'weight': v}) for (x, y), v in Counter(Ghm.edges()).items()]
Gh = nx.Graph(weight)
print("The graph of co-occurrent hashtags (in tweets) on %s has %i nodes and %i edges" %(day,len(Gh.nodes()),len(Gh.edges())))
GGh[day]=Gh
The graph of co-occurrent hashtags (in tweets) on 2019-12-16 has 95 nodes and 350 edges The graph of co-occurrent hashtags (in tweets) on 2019-12-17 has 111 nodes and 346 edges The graph of co-occurrent hashtags (in tweets) on 2019-12-18 has 132 nodes and 403 edges The graph of co-occurrent hashtags (in tweets) on 2019-12-19 has 165 nodes and 837 edges The graph of co-occurrent hashtags (in tweets) on 2019-12-20 has 138 nodes and 730 edges The graph of co-occurrent hashtags (in tweets) on 2019-12-21 has 72 nodes and 280 edges The graph of co-occurrent hashtags (in tweets) on 2019-12-22 has 71 nodes and 276 edges The graph of co-occurrent hashtags (in tweets) on 2019-12-23 has 171 nodes and 808 edges The graph of co-occurrent hashtags (in tweets) on 2019-12-24 has 76 nodes and 201 edges The graph of co-occurrent hashtags (in tweets) on 2019-12-25 has 66 nodes and 397 edges The graph of co-occurrent hashtags (in tweets) on 2019-12-26 has 105 nodes and 269 edges The graph of co-occurrent hashtags (in tweets) on 2019-12-27 has 158 nodes and 597 edges The graph of co-occurrent hashtags (in tweets) on 2019-12-28 has 56 nodes and 297 edges The graph of co-occurrent hashtags (in tweets) on 2019-12-29 has 74 nodes and 237 edges The graph of co-occurrent hashtags (in tweets) on 2019-12-30 has 92 nodes and 236 edges The graph of co-occurrent hashtags (in tweets) on 2019-12-31 has 92 nodes and 224 edges
# Let us consider 4 days during which the number of tweets (nodes of the corresponding graph of hashtags) is moderately high:
fourDays=["2019-12-21","2019-12-22","2019-12-25","2019-12-28"]
ctype=1
node_color="Girvan-Newman_community" #'Louvain_community' "connected_component"
bundled=1
nodelabels=0
width=400
height=400
node_size=2*np.log(5+hv.dim('degree')) #4*np.log(6+hv.dim('in-degree'))
node_cmap="tab10"
edge_line_width=1
edge_color='olive'
arrowhead_length=0.04
selection_mode='nodes'
selection_policy="nodes"
edge_hover_line_color='green'
node_hover_fill_color='red'
fontsize={'title': '9pt'}
text_font_size='4pt'
text_color='black'
bgcolor='white'
xoffset=0
yoffset=-0.03 #-15
Gh=GGh[fourDays[0]]
title="%s" %fourDays[0]
pos=graphviz_layout(Gh)
g0=hvnx_plot(Gh,ctype,pos,width,height,node_size,node_cmap,edge_color,edge_line_width,
title,bundled,nodelabels,xoffset,yoffset,
arrowhead_length,selection_mode,selection_policy,
edge_hover_line_color,node_hover_fill_color,
fontsize,text_font_size, text_color,bgcolor)
g0
Gh=GGh[fourDays[1]]
title="%s" %fourDays[1]
pos=graphviz_layout(Gh)
g1=hvnx_plot(Gh,ctype,pos,width,height,node_size,node_cmap,edge_color,edge_line_width,
title,bundled,nodelabels,xoffset,yoffset,
arrowhead_length,selection_mode,selection_policy,
edge_hover_line_color,node_hover_fill_color,
fontsize,text_font_size, text_color,bgcolor)
Gh=GGh[fourDays[2]]
title="%s" %fourDays[2]
pos=graphviz_layout(Gh)
g2=hvnx_plot(Gh,ctype,pos,width,height,node_size,node_cmap,edge_color,edge_line_width,
title,bundled,nodelabels,xoffset,yoffset,
arrowhead_length,selection_mode,selection_policy,
edge_hover_line_color,node_hover_fill_color,
fontsize,text_font_size, text_color,bgcolor)
Gh=GGh[fourDays[3]]
title="%s" %fourDays[3]
pos=graphviz_layout(Gh)
g3=hvnx_plot(Gh,ctype,pos,width,height,node_size,node_cmap,edge_color,edge_line_width,
title,bundled,nodelabels,xoffset,yoffset,
arrowhead_length,selection_mode,selection_policy,
edge_hover_line_color,node_hover_fill_color,
fontsize,text_font_size, text_color,bgcolor)
layout = hv.Layout(g0 + g1 + g2 + g3).cols(2)
layout.opts(title="The graphs of co-occurrent hashtags (in tweets) on %i days (colored in Girvan-Newman communities)" %len(fourDays))
No. of Girvan-Newman communities 12 No. of Girvan-Newman communities 11 No. of Girvan-Newman communities 8 No. of Girvan-Newman communities 10
print(len(dfd), len(gdf))
333 16
GGm={}
dmeds=[]
for i in range(len(gdf)):
source=[x for x in gdf.iloc[i]["Sender Tweeple List"] if x in ct]
target=[y for y in gdf.iloc[i]["List of Mentioned Tweeple"] if y in ct]
day=str(gdf.iloc[i]["Date"])[:10]
if len(source)>0 and len(target)>0:
for x in source:
for y in target:
dmeds.append((x,y,{'day': day}))
Gm=nx.MultiDiGraph()
Gm.add_edges_from(dmeds)
selfs=[e for e in Gm.edges() if e[0]==e[1]]
Gm.remove_edges_from(selfs)
isols=[n for n in Gm.nodes() if Gm.in_degree(n)==0 and Gm.out_degree(n)==0]
Gm.remove_nodes_from(isols)
print("The graph of mentions among tweeple on %s has %i nodes and %i edges" %(day,len(Gm.nodes()),len(Gm.edges())))
GGm[day]=Gm
# The graph of mentions among tweeple on 2019-12-01 has 0 nodes and 0 edges
# The graph of mentions among tweeple on 2019-12-03 has 0 nodes and 0 edges
# The graph of mentions among tweeple on 2019-12-10 has 0 nodes and 0 edges
# The graph of mentions among tweeple on 2019-12-11 has 2 nodes and 1 edges
# The graph of mentions among tweeple on 2019-12-12 has 3 nodes and 7 edges
# The graph of mentions among tweeple on 2019-12-13 has 3 nodes and 7 edges
# The graph of mentions among tweeple on 2019-12-16 has 5 nodes and 9 edges
# The graph of mentions among tweeple on 2019-12-17 has 5 nodes and 9 edges
# The graph of mentions among tweeple on 2019-12-18 has 5 nodes and 9 edges
# The graph of mentions among tweeple on 2019-12-23 has 5 nodes and 9 edges
# The graph of mentions among tweeple on 2019-12-24 has 5 nodes and 9 edges
# The graph of mentions among tweeple on 2019-12-31 has 5 nodes and 9 edges
# The graph of mentions among tweeple on 2020-01-02 has 5 nodes and 9 edges
# The graph of mentions among tweeple on 2020-01-03 has 12 nodes and 97 edges
# The graph of mentions among tweeple on 2020-01-04 has 14 nodes and 109 edges
# The graph of mentions among tweeple on 2020-01-05 has 18 nodes and 405 edges
# The graph of mentions among tweeple on 2020-01-06 has 19 nodes and 416 edges
# The graph of mentions among tweeple on 2020-01-07 has 24 nodes and 567 edges
# The graph of mentions among tweeple on 2020-01-13 has 99 nodes and 184375 edges
# The graph of mentions among tweeple on 2020-01-24 has 148 nodes and 189585 edges
# The graph of mentions among tweeple on 2020-01-31 has 1278 nodes and 19934302 edges
The graph of mentions among tweeple on 2019-12-16 has 29 nodes and 414 edges The graph of mentions among tweeple on 2019-12-17 has 52 nodes and 1194 edges The graph of mentions among tweeple on 2019-12-18 has 68 nodes and 2062 edges The graph of mentions among tweeple on 2019-12-19 has 73 nodes and 2324 edges The graph of mentions among tweeple on 2019-12-20 has 78 nodes and 2958 edges The graph of mentions among tweeple on 2019-12-21 has 82 nodes and 3025 edges The graph of mentions among tweeple on 2019-12-22 has 82 nodes and 3048 edges The graph of mentions among tweeple on 2019-12-23 has 87 nodes and 3989 edges The graph of mentions among tweeple on 2019-12-24 has 87 nodes and 4297 edges The graph of mentions among tweeple on 2019-12-25 has 87 nodes and 4342 edges The graph of mentions among tweeple on 2019-12-26 has 92 nodes and 4909 edges The graph of mentions among tweeple on 2019-12-27 has 93 nodes and 5396 edges The graph of mentions among tweeple on 2019-12-28 has 93 nodes and 5510 edges The graph of mentions among tweeple on 2019-12-29 has 94 nodes and 5731 edges The graph of mentions among tweeple on 2019-12-30 has 94 nodes and 6517 edges The graph of mentions among tweeple on 2019-12-31 has 94 nodes and 6970 edges
I'm going to reduce to a sample to make it more manageable
The graph of mentions among tweeple on 2019-12-16 has 172 nodes and 61371 edges
The graph of mentions among tweeple on 2019-12-17 has 242 nodes and 110449 edges
The graph of mentions among tweeple on 2019-12-18 has 284 nodes and 202622 edges
The graph of mentions among tweeple on 2019-12-19 has 295 nodes and 226202 edges
The graph of mentions among tweeple on 2019-12-20 has 316 nodes and 298395 edges
The graph of mentions among tweeple on 2019-12-21 has 323 nodes and 310966 edges
The graph of mentions among tweeple on 2019-12-22 has 327 nodes and 322064 edges
The graph of mentions among tweeple on 2019-12-23 has 341 nodes and 434267 edges
The graph of mentions among tweeple on 2019-12-24 has 349 nodes and 475948 edges
The graph of mentions among tweeple on 2019-12-25 has 351 nodes and 484423 edges
The graph of mentions among tweeple on 2019-12-26 has 357 nodes and 551368 edges
The graph of mentions among tweeple on 2019-12-27 has 362 nodes and 617550 edges
The graph of mentions among tweeple on 2019-12-28 has 363 nodes and 626133 edges
The graph of mentions among tweeple on 2019-12-29 has 363 nodes and 642636 edges
The graph of mentions among tweeple on 2019-12-30 has 366 nodes and 717960 edges
The graph of mentions among tweeple on 2019-12-31 has 367 nodes and 774880 edges
# Let us consider 4 days during which the number of tweets (nodes of the corresponding graph of mentions) is moderately high:
fourDays=["2019-12-16","2019-12-17","2019-12-18","2019-12-19"]
ctype=1
node_color="Girvan-Newman_community" #'Louvain_community' "connected_component"
bundled=0
nodelabels=0
width=400
height=400
node_size=15 #15*np.log(5+hv.dim('in_degree')) #4*np.log(6+hv.dim('in-degree'))
node_cmap="tab10"
edge_line_width=1
edge_color='lightgreen'
arrowhead_length=0.04
selection_mode='nodes'
selection_policy="nodes"
edge_hover_line_color='green'
node_hover_fill_color='red'
fontsize={'title': '9pt'}
text_font_size='4pt'
text_color='black'
bgcolor='white'
xoffset=0
yoffset=-0.03 #-15
Gm=GGm[fourDays[0]]
title="%s" %fourDays[0]
pos=graphviz_layout(Gm)
g0=hvnx_plot(Gm,ctype,pos,width,height,node_size,node_cmap,edge_color,edge_line_width,
title,bundled,nodelabels,xoffset,yoffset,
arrowhead_length,selection_mode,selection_policy,
edge_hover_line_color,node_hover_fill_color,
fontsize,text_font_size, text_color,bgcolor)
g0
Gm=GGm[fourDays[1]]
title="%s" %fourDays[1]
pos=graphviz_layout(Gm)
g1=hvnx_plot(Gm,ctype,pos,width,height,node_size,node_cmap,edge_color,edge_line_width,
title,bundled,nodelabels,xoffset,yoffset,
arrowhead_length,selection_mode,selection_policy,
edge_hover_line_color,node_hover_fill_color,
fontsize,text_font_size, text_color,bgcolor)
Gm=GGm[fourDays[2]]
title="%s" %fourDays[2]
pos=graphviz_layout(Gm)
g2=hvnx_plot(Gm,ctype,pos,width,height,node_size,node_cmap,edge_color,edge_line_width,
title,bundled,nodelabels,xoffset,yoffset,
arrowhead_length,selection_mode,selection_policy,
edge_hover_line_color,node_hover_fill_color,
fontsize,text_font_size, text_color,bgcolor)
Gm=GGm[fourDays[3]]
title="%s" %fourDays[3]
pos=graphviz_layout(Gm)
g3=hvnx_plot(Gm,ctype,pos,width,height,node_size,node_cmap,edge_color,edge_line_width,
title,bundled,nodelabels,xoffset,yoffset,
arrowhead_length,selection_mode,selection_policy,
edge_hover_line_color,node_hover_fill_color,
fontsize,text_font_size, text_color,bgcolor)
layout = hv.Layout(g0 + g1 + g2 + g3).cols(2)
layout.opts(title="The graphs of mentions among tweeple on %i days (colored in Girvan-Newman communities)" %len(fourDays))
No. of Girvan-Newman communities 3 No. of Girvan-Newman communities 3 No. of Girvan-Newman communities 3 No. of Girvan-Newman communities 3
GGn={}
for day in days:
dfd=eodfd[eodfd["Date"]==day]
meds=[]
for i in range(len(dfd)):
entities=dfd.iloc[i]["Named Entities List"]
if len(entities)>1:
aa=list(itertools.combinations(entities,2))
for j in aa:
meds.append((j[0],j[1]))
maeds=[]
for k,v in Counter(meds).items():
maeds.append((k[0],k[1],v))
Gnm=nx.MultiGraph()
Gnm.add_weighted_edges_from(maeds)
weight=[(x, y, {'weight': v}) for (x, y), v in Counter(Gnm.edges()).items()]
Gn = nx.Graph(weight)
print("The graph of co-occurrent named entities (in tweets) on %s has %i nodes and %i edges" %(day,len(Gn.nodes()),len(Gn.edges())))
GGn[day]=Gn
The graph of co-occurrent named entities (in tweets) on 2019-12-16 has 233 nodes and 486 edges The graph of co-occurrent named entities (in tweets) on 2019-12-17 has 255 nodes and 562 edges The graph of co-occurrent named entities (in tweets) on 2019-12-18 has 293 nodes and 719 edges The graph of co-occurrent named entities (in tweets) on 2019-12-19 has 197 nodes and 536 edges The graph of co-occurrent named entities (in tweets) on 2019-12-20 has 236 nodes and 601 edges The graph of co-occurrent named entities (in tweets) on 2019-12-21 has 140 nodes and 271 edges The graph of co-occurrent named entities (in tweets) on 2019-12-22 has 115 nodes and 249 edges The graph of co-occurrent named entities (in tweets) on 2019-12-23 has 312 nodes and 692 edges The graph of co-occurrent named entities (in tweets) on 2019-12-24 has 190 nodes and 437 edges The graph of co-occurrent named entities (in tweets) on 2019-12-25 has 74 nodes and 137 edges The graph of co-occurrent named entities (in tweets) on 2019-12-26 has 240 nodes and 587 edges The graph of co-occurrent named entities (in tweets) on 2019-12-27 has 269 nodes and 614 edges The graph of co-occurrent named entities (in tweets) on 2019-12-28 has 118 nodes and 185 edges The graph of co-occurrent named entities (in tweets) on 2019-12-29 has 127 nodes and 247 edges The graph of co-occurrent named entities (in tweets) on 2019-12-30 has 231 nodes and 599 edges The graph of co-occurrent named entities (in tweets) on 2019-12-31 has 211 nodes and 529 edges
# Let us consider 4 days during which the number of tweets (nodes of the corresponding graph of named entities) is moderately high:
fourDays=["2019-12-21","2019-12-22","2019-12-25","2019-12-28"]
ctype=1
node_color="Girvan-Newman_community" #'Louvain_community' "connected_component"
bundled=1
nodelabels=0
width=400
height=400
node_size=2*np.log(5+hv.dim('degree')) #4*np.log(6+hv.dim('in-degree'))
node_cmap="tab10"
edge_line_width=1
edge_color='olive'
arrowhead_length=0.04
selection_mode='nodes'
selection_policy="nodes"
edge_hover_line_color='green'
node_hover_fill_color='red'
fontsize={'title': '9pt'}
text_font_size='4pt'
text_color='black'
bgcolor='white'
xoffset=0
yoffset=-0.03 #-15
Gh=GGh[fourDays[0]]
title="%s" %fourDays[0]
pos=graphviz_layout(Gh)
g0=hvnx_plot(Gh,ctype,pos,width,height,node_size,node_cmap,edge_color,edge_line_width,
title,bundled,nodelabels,xoffset,yoffset,
arrowhead_length,selection_mode,selection_policy,
edge_hover_line_color,node_hover_fill_color,
fontsize,text_font_size, text_color,bgcolor)
g0
Gh=GGh[fourDays[1]]
title="%s" %fourDays[1]
pos=graphviz_layout(Gh)
g1=hvnx_plot(Gh,ctype,pos,width,height,node_size,node_cmap,edge_color,edge_line_width,
title,bundled,nodelabels,xoffset,yoffset,
arrowhead_length,selection_mode,selection_policy,
edge_hover_line_color,node_hover_fill_color,
fontsize,text_font_size, text_color,bgcolor)
Gh=GGh[fourDays[2]]
title="%s" %fourDays[2]
pos=graphviz_layout(Gh)
g2=hvnx_plot(Gh,ctype,pos,width,height,node_size,node_cmap,edge_color,edge_line_width,
title,bundled,nodelabels,xoffset,yoffset,
arrowhead_length,selection_mode,selection_policy,
edge_hover_line_color,node_hover_fill_color,
fontsize,text_font_size, text_color,bgcolor)
Gh=GGh[fourDays[3]]
title="%s" %fourDays[3]
pos=graphviz_layout(Gh)
g3=hvnx_plot(Gh,ctype,pos,width,height,node_size,node_cmap,edge_color,edge_line_width,
title,bundled,nodelabels,xoffset,yoffset,
arrowhead_length,selection_mode,selection_policy,
edge_hover_line_color,node_hover_fill_color,
fontsize,text_font_size, text_color,bgcolor)
layout = hv.Layout(g0 + g1 + g2 + g3).cols(2)
layout.opts(title="The graphs of co-occurrent named entities (in tweets) on %i days (colored in Girvan-Newman communities)" %len(fourDays))
No. of Girvan-Newman communities 12 No. of Girvan-Newman communities 11 No. of Girvan-Newman communities 8 No. of Girvan-Newman communities 10